1 Intro

library(synapser)
library(tidyverse)
synLogin()
## Welcome, Robert Allaway!
## NULL
library(circlize)


# get counts file
counts <- synGet("syn29532377", version = 2)$path %>% readr::read_tsv() 
# get metadata
query <- "SELECT * FROM syn11601459 where assay = 'rnaSeq' and fileFormat = 'sf' and name = 'quant.sf'"
metadata <- synTableQuery(query)$asDataFrame() 
## 
 [####################]100.00%   1/1   Done...    
Downloading  [####################]100.00%   10.5kB/10.5kB (6.0MB/s) SYNAPSE_TABLE_QUERY_106582141.csv Done...
counts_tidy <- counts %>% 
  group_by(gene_id) %>% 
  gather(contains("NF"), key = "cell_line", value = "counts") %>% 
  rename(ensembl_gene_id_version = gene_id) %>% 
  ungroup()
#source https://www.biostars.org/p/44426/
library(biomaRt)
mart <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
## Ensembl site unresponsive, trying asia mirror
## Ensembl site unresponsive, trying uswest mirror
## Ensembl site unresponsive, trying useast mirror
gene_list <- getBM(attributes = c("ensembl_gene_id_version", "chromosome_name", "start_position", "end_position"),
                   values     = counts$gene_id,
                   filter     = "ensembl_gene_id_version",
                   mart       = mart)

counts_tidy_2 <- inner_join(counts_tidy, gene_list) %>% 
  filter(!cell_line %in% c("hTERT_NF1_ipNF05.5","hTERT_NF1_ipNF95.11b_C","hTERT_NF1_ipNF95.6"))
## Joining, by = "ensembl_gene_id_version"
  ##no matched cnv data for pnf, remove these

1.1 Get CNV bed files

query <- synTableQuery("SELECT * FROM syn35928271.2")$asDataFrame() 
## 
 [####################]100.00%   1/1   Done...    
Downloading  [####################]100.00%   4.1kB/4.1kB (11.3MB/s) SYNAPSE_TABLE_QUERY_106582322.csv Done...
bed_ents <- lapply(query$id, synGet)
beds <- lapply(bed_ents, function(x){
  readr::read_delim(x$path, col_names = c('chr','start','end','value1'))
  })
## Rows: 3770 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3912 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3451 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3670 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3682 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3668 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3321 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3733 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4023 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4030 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3943 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3991 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3267 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 4422 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (1): chr
## dbl (3): start, end, value1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
bed_names <- lapply(bed_ents, function(x) {stringr::str_remove(x$properties$name, ".bed")}) 
names(beds) <- bed_names
reorder_beds <- c("28cNF", "i28cNF", "cNF04.9a", "icNF04.9a", "cNF00.10a", "icNF00.10a", "cNF97.2a", "icNF97.2a", "cNF97.2b", "icNF97.2b", "cNF98.4c", "icNF98.4c", "cNF98.4d", "icNF98.4d")
beds <- beds[reorder_beds]

1.2 Plot circos plots of CNV vs gene expression

for(i in unique(counts_tidy_2$cell_line)){
  counts_bed <- counts_tidy_2 %>% filter(cell_line == i) %>% 
    dplyr::select(chromosome_name, start_position, end_position, counts) %>% 
    set_names(c("chr", "start", "end", "value1")) %>% 
    mutate(chr = paste0("chr", chr)) %>% 
    mutate(value1 = log10(value1+1))
  
  cnv_bed <- beds[[i]] %>% 
    mutate(value1 = log10(value1+1))
  
  circos.initializeWithIdeogram(species = "hg38") 
 
  circos.genomicTrackPlotRegion(counts_bed, numeric.column = c("value1"), 
    panel.fun = function(region, value, ...) {
        circos.genomicPoints(region, value, pch = 20, cex = 0.1, col = '#003554')
    })
  
  circos.genomicTrackPlotRegion(cnv_bed, numeric.column = c("value1"), 
    panel.fun = function(region, value, ...) {
        circos.genomicLines(region, value, col = "#FC6471", area = T)
    })
  
  title(i)
}

Plot again, but this time to save PDFs.

for(i in unique(counts_tidy_2$cell_line)){
  counts_bed <- counts_tidy_2 %>% filter(cell_line == i) %>% 
    dplyr::select(chromosome_name, start_position, end_position, counts) %>% 
    set_names(c("chr", "start", "end", "value1")) %>% 
    mutate(chr = paste0("chr", chr)) %>% 
    mutate(value1 = log10(value1+1))
  
  cnv_bed <- beds[[i]] %>% 
    mutate(value1 = log10(value1+1))
  
  filename <- glue::glue("../figures/{i}_cnv_circos.pdf")

  pdf(filename)
  
  circos.initializeWithIdeogram(species = "hg38") 
 
  circos.genomicTrackPlotRegion(counts_bed, numeric.column = c("value1"), 
    panel.fun = function(region, value, ...) {
        circos.genomicPoints(region, value, pch = 20, cex = 0.1, col = '#003554')
    })
  
  circos.genomicTrackPlotRegion(cnv_bed, numeric.column = c("value1"), 
    panel.fun = function(region, value, ...) {
        circos.genomicLines(region, value, col = "#FC6471", area = T)
    })
  
  title(i)
  
  dev.off()
}